home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Atari Mega Archive 1
/
Atari Mega Archive - Volume 1.iso
/
mint
/
editors
/
mjovesrc.zoo
/
re.c
< prev
next >
Wrap
C/C++ Source or Header
|
1992-04-04
|
19KB
|
979 lines
/***************************************************************************
* This program is Copyright (C) 1986, 1987, 1988 by Jonathan Payne. JOVE *
* is provided to you without charge, and with no warranty. You may give *
* away copies of JOVE, including sources, provided that this notice is *
* included in all the files. *
***************************************************************************/
/* search package */
#include "jove.h"
#include "re.h"
#include "ctype.h"
private void
search proto((int, bool, bool));
private int
do_comp proto((struct RE_block *,int));
private char
searchstr[128]; /* global search string */
char rep_search[128], /* replace search string */
rep_str[128]; /* contains replacement string */
bool CaseIgnore = OFF, /* ignore case? */
WrapScan = OFF, /* wrap at end of buffer? */
UseRE = OFF; /* use regular expressions */
#define cind_cmp(a, b) (CharUpcase(a) == CharUpcase(b))
private int REpeekc;
private char *REptr;
private int
REgetc()
{
int c;
if ((c = REpeekc) != -1)
REpeekc = -1;
else if (*REptr)
c = *REptr++;
else
c = '\0';
return c;
}
#define STAR 01 /* Match any number of last RE. */
#define AT_BOL 2 /* ^ */
#define AT_EOL 4 /* $ */
#define AT_BOW 6 /* \< */
#define AT_EOW 8 /* \> */
#define OPENP 10 /* \( */
#define CLOSEP 12 /* \) */
#define CURLYB 14 /* \{ */
#define NOSTR 14 /* Codes <= NOSTR can't be *'d. */
#define ANYC (NOSTR+2) /* . */
#define NORMC (ANYC+2) /* normal character */
#define CINDC (NORMC+2) /* case independent character */
#define ONE_OF (CINDC+2) /* [xxx] */
#define NONE_OF (ONE_OF+2) /* [^xxx] */
#define BACKREF (NONE_OF+2) /* \# */
#define EOP (BACKREF+2) /* end of pattern */
/* ONE_OF/NONE_OF is represented as a bit vector.
* These symbols parameterize the representation.
*/
#define BYTESIZE 8
#ifndef MiNT
#define SETSIZE (NCHARS / BYTESIZE)
#else
#define SETSIZE (0200 / BYTESIZE)
#endif /* MiNT */
#define SETBYTE(c) ((c) / BYTESIZE)
#define SETBIT(c) (1 << ((c) % BYTESIZE))
#define NPAR 10 /* [0-9] - 0th is the entire matched string, i.e. & */
private char *comp_ptr,
**alt_p,
**alt_endp;
void
REcompile(pattern, re, re_blk)
char *pattern;
bool re;
struct RE_block *re_blk;
{
REptr = pattern;
REpeekc = -1;
comp_ptr = re_blk->r_compbuf;
alt_p = re_blk->r_alternates;
alt_endp = alt_p + NALTS;
*alt_p++ = comp_ptr;
re_blk->r_nparens = 0;
(void) do_comp(re_blk, re ? OKAY_RE : NORM);
*alt_p = NULL;
re_blk->r_anchored = NO;
re_blk->r_firstc = '\0';
/* do a little post processing */
if (re_blk->r_alternates[1] == NULL) {
char *p;
p = re_blk->r_alternates[0];
for (;;) {
switch (*p) {
case OPENP:
case CLOSEP:
p += 2;
continue;
case AT_BOW:
case AT_EOW:
p += 1;
continue;
case AT_BOL:
re_blk->r_anchored = YES;
/* don't set firstc -- won't work */
break;
case NORMC:
case CINDC:
re_blk->r_firstc = CharUpcase(p[2]);
break;
default:
break;
}
break;
}
}
}
/* compile the pattern into an internal code */
private int
do_comp(re_blk, kind)
struct RE_block *re_blk;
int kind;
{
char *this_verb,
*prev_verb,
*start_p,
*comp_endp;
int parens[NPAR],
*parenp,
c,
ret_code;
parenp = parens;
this_verb = NULL;
ret_code = 1;
comp_endp = &re_blk->r_compbuf[COMPSIZE - 6];
/* wrap the whole expression around (implied) parens */
if (kind == OKAY_RE) {
*comp_ptr++ = OPENP;
*comp_ptr++ = re_blk->r_nparens;
*parenp++ = re_blk->r_nparens++;
}
start_p = comp_ptr;
while ((c = REgetc()) != '\0') {
if (comp_ptr > comp_endp) {
toolong:
complain("Search string too long/complex.");
}
prev_verb = this_verb;
this_verb = comp_ptr;
if (kind == NORM && strchr(".[*", c) != NULL)
goto defchar;
switch (c) {
case '\\':
switch (c = REgetc()) {
case '\0':
complain("[Premature end of pattern]");
/*NOTREACHED*/
case '{':
{
char *wcntp; /* word count */
*comp_ptr++ = CURLYB;
wcntp = comp_ptr;
*comp_ptr++ = 0;
for (;;) {
int comp_val;
char *comp_len;
comp_len = comp_ptr++;
comp_val = do_comp(re_blk, IN_CB);
*comp_len = comp_ptr - comp_len;
(*wcntp) += 1;
if (comp_val == 0)
break;
}
break;
}
case '}':
if (kind != IN_CB)
complain("Unexpected \\}.");
ret_code = 0;
goto outahere;
case '(':
if (re_blk->r_nparens >= NPAR)
complain("Too many ('s; max is %d.", NPAR);
*comp_ptr++ = OPENP;
*comp_ptr++ = re_blk->r_nparens;
*parenp++ = re_blk->r_nparens++;
break;
case ')':
if (parenp == parens)
complain("Too many )'s.");
*comp_ptr++ = CLOSEP;
*comp_ptr++ = *--parenp;
break;
case '|':
if (alt_p >= alt_endp)
complain("Too many alternates; max %d.", NALTS);
/* close off previous alternate */
*comp_ptr++ = CLOSEP;
*comp_ptr++ = *--parenp;
*comp_ptr++ = EOP;
*alt_p++ = comp_ptr;
/* start a new one */
re_blk->r_nparens = 0;
*comp_ptr++ = OPENP;
*comp_ptr++ = re_blk->r_nparens;
*parenp++ = re_blk->r_nparens++;
start_p = comp_ptr;
break;
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
*comp_ptr++ = BACKREF;
*comp_ptr++ = c - '0';
break;
case '<':
*comp_ptr++ = AT_BOW;
break;
case '>':
*comp_ptr++ = AT_EOW;
break;
default:
goto defchar;
}
break;
case ',':
if (kind != IN_CB)
goto defchar;
goto outahere;
case '.':
*comp_ptr++ = ANYC;
break;
case '^':
if (comp_ptr == start_p) {
*comp_ptr++ = AT_BOL;
break;
}
goto defchar;
case '$':
if ((REpeekc = REgetc()) != '\0' && REpeekc != '\\')
goto defchar;
*comp_ptr++ = AT_EOL;
break;
case '[':
{
int chrcnt;
*comp_ptr++ = ONE_OF;
if (comp_ptr + SETSIZE >= comp_endp)
goto toolong;
byte_zero(comp_ptr, (size_t) SETSIZE);
if ((REpeekc = REgetc()) == '^') {
*this_verb = NONE_OF;
/* Get it for real this time. */
(void) REgetc();
}
chrcnt = 0;
while ((c = REgetc()) != ']' && c != '\0') {
if (c == '\\') {
c = REgetc();
if (c == '\0')
break;
} else if ((REpeekc = REgetc()) == '-') {
int i;
i = c;
(void) REgetc(); /* reread '-' */
c = REgetc();
if (c == '\0')
break;
while (i < c) {
comp_ptr[SETBYTE(i)] |= SETBIT(i);
i += 1;
}
}
comp_ptr[SETBYTE(c)] |= SETBIT(c);
chrcnt += 1;
}
if (c == '\0')
complain("Missing ].");
if (chrcnt == 0)
complain("Empty [].");
comp_ptr += SETSIZE;
break;
}
case '*':
if (prev_verb == NULL || *prev_verb <= NOSTR || (*prev_verb&STAR)!=0)
goto defchar;
if (*prev_verb == NORMC || *prev_verb == CINDC) {
char lastc = comp_ptr[-1];
/* The * operator applies only to the
* previous character. Since we were
* building a string-matching command
* (NORMC or CINDC), we must split it
* up and work with the last character.
*
* Note that the STARed versions of these
* commands do not operate on strings, and
* so do not need or have character counts.
*/
if (prev_verb[1] == 1) {
/* Only one char in string:
* delete old command.
*/
this_verb = prev_verb;
} else {
/* Several chars in string:
* strip off the last.
* New verb is derived from old.
*/
prev_verb[1] -= 1;
this_verb -= 1;
*this_verb = *prev_verb;
}
comp_ptr = this_verb + 1;
*comp_ptr++ = lastc;
} else {
/* This command is just the previous one,
* whose verb we will modify.
*/
this_verb = prev_verb;
}
*this_verb |= STAR;
break;
default:
defchar:
if ((prev_verb == NULL) ||
!(*prev_verb == NORMC || *prev_verb == CINDC)) {
/* create new string command */
*comp_ptr++ = (CaseIgnore) ? CINDC : NORMC;
*comp_ptr++ = 0;
} else {
/* merge this into previous string command */
this_verb = prev_verb;
}
this_verb[1] +=